{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {
    "collapsed": false,
    "deletable": true,
    "editable": true
   },
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Using TensorFlow backend.\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "corpus length: 1115394\n",
      "total chars: 39\n",
      "nb sequences: 1115355\n",
      "Vectorization...\n",
      "vetorization completed\n"
     ]
    }
   ],
   "source": [
    "\n",
    "####\n",
    "\n",
    "#minesh.mathew@gmail.com\n",
    "#modified version of text generation example in keras; trained in a many-to-many fashion using a time distributed dense layer\n",
    "\n",
    "####\n",
    "from __future__ import print_function\n",
    "from keras.models import Sequential\n",
    "from keras.layers import Dense, Activation, Dropout\n",
    "from keras.layers import LSTM,TimeDistributed,SimpleRNN\n",
    "from keras.utils.data_utils import get_file\n",
    "import numpy as np\n",
    "from time import sleep\n",
    "import random\n",
    "import sys\n",
    "\n",
    "##uncomment below if you want to use nietzches writings as the corpus\n",
    "\n",
    "#path = get_file('nietzsche.txt', origin=\"https://s3.amazonaws.com/text-datasets/nietzsche.txt\")\n",
    "#text = open(path).read().lower()\n",
    "text = open('./textdatasets/tinyshakesepare.txt').read().lower()\n",
    "print('corpus length:', len(text))\n",
    "\n",
    "chars = sorted(list(set(text)))\n",
    "print('total chars:', len(chars))\n",
    "char_indices = dict((c, i) for i, c in enumerate(chars))\n",
    "indices_char = dict((i, c) for i, c in enumerate(chars))\n",
    "\n",
    "# split the corpus into sequences of length=maxlen\n",
    "#input is a sequence of 40 chars and target is also a sequence of 40 chars shifted by one position\n",
    "#for eg: if you maxlen=3 and the text corpus is abcdefghi, your input ---> target pairs will be\n",
    "# [a,b,c] --> [b,c,d], [b,c,d]--->[c,d,e]....and so on\n",
    "maxlen = 40\n",
    "step = 1\n",
    "sentences = []\n",
    "next_chars = []\n",
    "for i in range(0, len(text) - maxlen+1, step):\n",
    "    sentences.append(text[i: i + maxlen])\n",
    "    next_chars.append(text[i+1:i +1+ maxlen])\n",
    "    #if i<10 :\n",
    "       # print (text[i: i + maxlen])\n",
    "        #print(text[i+1:i +1+ maxlen])\n",
    "print('nb sequences:', len(sentences))\n",
    "\n",
    "print('Vectorization...')\n",
    "X = np.zeros((len(sentences), maxlen, len(chars)), dtype=np.bool)\n",
    "y = np.zeros((len(sentences),maxlen, len(chars)), dtype=np.bool) # y is also a sequence , or  a seq of 1 hot vectors\n",
    "for i, sentence in enumerate(sentences):\n",
    "    for t, char in enumerate(sentence):\n",
    "        X[i, t, char_indices[char]] = 1\n",
    "\n",
    "for i, sentence in enumerate(next_chars):\n",
    "    for t, char in enumerate(sentence):\n",
    "        y[i, t, char_indices[char]] = 1\n",
    "    \n",
    "\n",
    "print ('vetorization completed')\n",
    "\n",
    "\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {
    "collapsed": false,
    "deletable": true,
    "editable": true
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Build model...\n",
      "model is made\n"
     ]
    }
   ],
   "source": [
    "# build the model: 2 stacked LSTM\n",
    "print('Build model...')\n",
    "model = Sequential()\n",
    "#model.add(LSTM(512, return_sequences=True, input_shape=(maxlen, len(chars))))  # original one\n",
    "model.add(LSTM(512, input_shape=(maxlen, len(chars)),return_sequences=True)) #minesh witout specifying the input_length\n",
    "model.add(LSTM(512, return_sequences=True)) #- original\n",
    "model.add(Dropout(0.2))\n",
    "model.add(TimeDistributed(Dense(len(chars))))\n",
    "model.add(Activation('softmax'))\n",
    "\n",
    "model.compile(loss='categorical_crossentropy', optimizer='rmsprop')\n",
    "\n",
    "print ('model is made')\n",
    "\n",
    "# train the model, output generated text after each iteration"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {
    "collapsed": false,
    "deletable": true,
    "editable": true,
    "scrolled": true
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "_________________________________________________________________\n",
      "Layer (type)                 Output Shape              Param #   \n",
      "=================================================================\n",
      "lstm_1 (LSTM)                (None, 40, 512)           1130496   \n",
      "_________________________________________________________________\n",
      "lstm_2 (LSTM)                (None, 40, 512)           2099200   \n",
      "_________________________________________________________________\n",
      "dropout_1 (Dropout)          (None, 40, 512)           0         \n",
      "_________________________________________________________________\n",
      "time_distributed_1 (TimeDist (None, 40, 39)            20007     \n",
      "_________________________________________________________________\n",
      "activation_1 (Activation)    (None, 40, 39)            0         \n",
      "=================================================================\n",
      "Total params: 3,249,703.0\n",
      "Trainable params: 3,249,703.0\n",
      "Non-trainable params: 0.0\n",
      "_________________________________________________________________\n",
      "None\n"
     ]
    }
   ],
   "source": [
    "print (model.summary())"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {
    "collapsed": false,
    "deletable": true,
    "editable": true
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n",
      "--------------------------------------------------\n",
      "Iteration 1\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/usr/local/lib/python2.7/dist-packages/keras/models.py:826: UserWarning: The `nb_epoch` argument in `fit` has been renamed `epochs`.\n",
      "  warnings.warn('The `nb_epoch` argument in `fit` '\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "loss is\n",
      "1.2514607235\n",
      "<keras.callbacks.History object at 0x7fb70c7cc6d0>\n",
      "\n",
      "\n",
      "--------------------------------------------------\n",
      "Iteration 2\n",
      "loss is\n",
      "0.844818573744\n",
      "<keras.callbacks.History object at 0x7fb5c426de90>\n",
      "\n",
      "\n",
      "--------------------------------------------------\n",
      "Iteration 3\n",
      "loss is\n",
      "0.731364498821\n",
      "<keras.callbacks.History object at 0x7fb70c7cc6d0>\n",
      "\n",
      "\n",
      "--------------------------------------------------\n",
      "Iteration 4\n",
      "loss is\n",
      "0.675374876133\n",
      "<keras.callbacks.History object at 0x7fb5c426de90>\n",
      "\n",
      "\n",
      "--------------------------------------------------\n",
      "Iteration 5\n",
      "loss is\n",
      "0.639504391519\n",
      "<keras.callbacks.History object at 0x7fb5c41ff890>\n",
      "\n"
     ]
    }
   ],
   "source": [
    "for iteration in range(1, 6):\n",
    "    print()\n",
    "    print('-' * 50)\n",
    "    print('Iteration', iteration)\n",
    "    history=model.fit(X, y, batch_size=128, nb_epoch=1,verbose=0)\n",
    "    sleep(0.1) # https://github.com/fchollet/keras/issues/2110\n",
    "    \n",
    "    # saving models at the following iterations -- uncomment it if you want tos save weights and load it later\n",
    "    #if iteration==1 or iteration==3 or iteration==5 or iteration==10 or iteration==20 or iteration==30 or iteration==50 or iteration==60 :\n",
    "    #    model.save_weights('Karpathy_LSTM_weights_'+str(iteration)+'.h5', overwrite=True)\n",
    "    #start_index = random.randint(0, len(text) - maxlen - 1)\n",
    "\n",
    "    #sys.stdout.flush()\n",
    "    print ('loss is')\n",
    "    print (history.history['loss'][0])\n",
    "    print (history)\n",
    "    print()    \n",
    "\n",
    "    "
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "deletable": true,
    "editable": true
   },
   "source": [
    "#### testing\n",
    "now you use the trained model to generat text.\n",
    "the  output shown in this notebook is for a model which is trained only for 1 iteration"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 28,
   "metadata": {
    "collapsed": false,
    "deletable": true,
    "editable": true
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "seed string --> brutus: this aint right. you should be going\n",
      "The generated text is\n",
      "brutus: this aint right. you should be going.\n",
      "\n",
      "messenger:\n",
      "what is the bell of marcius' to got\n",
      "seem to the fire, and power in men\n",
      "i am too young to be your father,\n",
      "though they would swear shalt be avoided\n",
      "'twere properly as i swood and with briets,\n",
      "his warwick takes here and the lady bona send to him.\n",
      "now, for a worthy lady and my heart\n",
      "to have it prest'st not the king in lawful marriage.\n",
      "\n",
      "queen margaret:\n",
      "ay, now begins a second storm to rise;\n",
      "for this is he would show me thus for a worthy children hast thou, widow?\n",
      "\n",
      "lucio:\n",
      "\n",
      "isabella:\n",
      "i would to god all strifes were well compounded.\n",
      "my sovereign liege, no letters are made to come about me:\n",
      "the king's grown bankrupt, like a broken man.\n",
      "\n",
      "northumberland:\n",
      "my lord, the mayor of london comes to greet you.\n",
      "\n",
      "lord mayor:\n",
      "god bless your grace with health and warwick's daughter shall be thine.\n",
      "\n",
      "prince edward:\n",
      "my lord of york will still be cross in talk:\n",
      "upon the stroke of four.\n",
      "\n",
      "hastings:\n",
      "o this i can tell you.\n",
      "\n",
      "mopsa:\n",
      "we can both sing it: if thou dost obey:\n",
      "that shall be so first to be a s"
     ]
    }
   ],
   "source": [
    "seed_string=\"brutus: this aint right. you should be going\" \n",
    "print (\"seed string -->\", seed_string)\n",
    "print ('The generated text is')\n",
    "sys.stdout.write(seed_string),\n",
    "#x=np.zeros((1, len(seed_string), len(chars)))\n",
    "for i in range(1000):\n",
    "    x=np.zeros((1, len(seed_string[-40:]), len(chars)))\n",
    "    for tt, char in enumerate(seed_string[-40:]):\n",
    "        x[0, tt, char_indices[char]] = 1.\n",
    "    preds = model.predict(x, verbose=0)[0]\n",
    "    #print (np.argmax(preds[7]))\n",
    "    next_index=np.argmax(preds[len(seed_string[-40:])-1])\n",
    "    \n",
    "    \n",
    "    #next_index=np.argmax(preds[len(seed_string)-11])\n",
    "    #print (preds.shape)\n",
    "    #print (preds)\n",
    "    #next_index = sample(preds, 1) #diversity is 1\n",
    "    next_char = indices_char[next_index]\n",
    "    seed_string = seed_string + next_char\n",
    "    \n",
    "    #print (seed_string)\n",
    "    #print ('##############')\n",
    "    #if i==40:\n",
    "    #    print ('####')\n",
    "    sys.stdout.write(next_char)\n",
    "\n",
    "sys.stdout.flush()    \n",
    "\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 14,
   "metadata": {
    "collapsed": false,
    "deletable": true,
    "editable": true
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "39"
      ]
     },
     "execution_count": 14,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "len(seed_string)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 2",
   "language": "python",
   "name": "python2"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 2
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython2",
   "version": "2.7.12"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 0
}
